'''
Author: JiraiyaChen 49619401+JiraiyaChen@users.noreply.github.com
Date: 2023-11-21 16:54:27
LastEditors: JiraiyaChen 979526234@qq.com
LastEditTime: 2024-03-25 13:32:36
Description: 
'''
import scrapy
from scrapy import Selector, Request
from spider2107.items import UrlList


class DoubanSpider(scrapy.Spider):
    name = "suZhouUrlList"
    allowed_domains = ["xc8866.cc"]
    start_urls = ["https://xc8866.cc"]

    def start_requests(self):
        for page in range(76):
            # https://xc8866.cc/forum-21-2.htm?tagids=131_0_0_0
            yield Request(url=f'https://xc8866.cc/forum-21-{page}.htm?tagids=131_0_0_0')

    def parse(self, response):
        test = response.xpath('//a[starts-with(@href, "thread")]')
        arr = []
        for item in test:
            url = item.css('a::attr(href)').extract_first()
            arr.append(url)
        url_list = UrlList()
        url_list['url'] = arr
        yield url_list

